# Computations
import pandas as pd
import numpy as np
# sklearn
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
# KMeans
from sklearn.cluster import KMeans
# preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
# Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## seaborn
import seaborn as sns
sns.set_style('whitegrid')
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
## matplotlib
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
import matplotlib.gridspec as gridspec
import matplotlib.colors
from pylab import rcParams
from matplotlib.font_manager import FontProperties
plt.rcParams['figure.figsize'] = 14, 8
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
# %config InlineBackend.figure_format = 'retina'
import warnings
warnings.filterwarnings("ignore")
In this article, we analyze a weather dataset from Kaggle.com.
Data description from Kaggle:
Data = pd.read_csv('weatherdata/daily_weather.csv')
Data.drop(columns = ['number'], inplace = True)
Data.head().style.hide_index().set_precision(2)
| air_pressure_9am | air_temp_9am | avg_wind_direction_9am | avg_wind_speed_9am | max_wind_direction_9am | max_wind_speed_9am | rain_accumulation_9am | rain_duration_9am | relative_humidity_9am | relative_humidity_3pm |
|---|---|---|---|---|---|---|---|---|---|
| 918.06 | 74.82 | 271.10 | 2.08 | 295.40 | 2.86 | 0.00 | 0.00 | 42.42 | 36.16 |
| 917.35 | 71.40 | 101.94 | 2.44 | 140.47 | 3.53 | 0.00 | 0.00 | 24.33 | 19.43 |
| 923.04 | 60.64 | 51.00 | 17.07 | 63.70 | 22.10 | 0.00 | 20.00 | 8.90 | 14.46 |
| 920.50 | 70.14 | 198.83 | 4.34 | 211.20 | 5.19 | 0.00 | 0.00 | 12.19 | 12.74 |
| 921.16 | 44.29 | 277.80 | 1.86 | 136.50 | 2.86 | 8.90 | 14730.00 | 92.41 | 76.74 |
| Columns | Description |
|---|---|
| Air Pressure | Air pressure StartFragment in hectopascal (100 pascals) at 9 AM |
| Air Temperature | Air temperature in degrees Fahrenheit at 9 AM |
| Avg Wind Direction | Average wind direction over the minute before the timestamp in degrees (0 starts from the north) at 9 AM |
| Avg Wind Speed | Average wind speed over the minute before the timestamp in meter per seconds (m/s) at 9 AM |
| Max Wind Direction | Highest wind direction in the minute before the timestamp in degrees (0 starts from the north) at 9 AM |
| Max Wind Speed | Highest wind speed in the minute before the timestamp in meter per seconds (m/s) at 9 AM |
| Min Wind Speed | Smallest wind speed in the minute before the timestamp in meter per seconds (m/s) at 9 AM |
| Rain Accumulation | Accumulated rain in millimeters (mm) at 9 AM |
| Rain Duration | Length of time rain in seconds (s) at 9 AM |
| Relative Humidity (Morning) | Relative humidity in percentage in at 9 AM |
| Relative Humidity (Afternoon) | Relative humidity in percentage at 3 PM |
For convenience, we would like to modify the feature names.
Data.columns = [x.replace('ty_9am','ty_(Morning)').replace('3pm', '(Afternoon)').replace('_9am', '').replace('_',
' ').title().replace('Temp','Temperature') for x in Data.columns.tolist()]
Data.head(5).style.hide_index().set_precision(2)
| Air Pressure | Air Temperature | Avg Wind Direction | Avg Wind Speed | Max Wind Direction | Max Wind Speed | Rain Accumulation | Rain Duration | Relative Humidity (Morning) | Relative Humidity (Afternoon) |
|---|---|---|---|---|---|---|---|---|---|
| 918.06 | 74.82 | 271.10 | 2.08 | 295.40 | 2.86 | 0.00 | 0.00 | 42.42 | 36.16 |
| 917.35 | 71.40 | 101.94 | 2.44 | 140.47 | 3.53 | 0.00 | 0.00 | 24.33 | 19.43 |
| 923.04 | 60.64 | 51.00 | 17.07 | 63.70 | 22.10 | 0.00 | 20.00 | 8.90 | 14.46 |
| 920.50 | 70.14 | 198.83 | 4.34 | 211.20 | 5.19 | 0.00 | 0.00 | 12.19 | 12.74 |
| 921.16 | 44.29 | 277.80 | 1.86 | 136.50 | 2.86 | 8.90 | 14730.00 | 92.41 | 76.74 |
def Data_info(Inp, Only_NaN = False):
Out = Inp.dtypes.to_frame(name='Data Type').sort_values(by=['Data Type'])
Out = Out.join(Inp.isnull().sum().to_frame(name = 'Number of NaN Values'), how='outer')
Out['Percentage'] = np.round(100*(Out['Number of NaN Values']/Inp.shape[0]),2)
if Only_NaN:
Out = Out.loc[Out['Number of NaN Values']>0]
return Out
Temp = Data_info(Data, Only_NaN = True)
display(Temp)
Temp = Temp.index.tolist()
| Data Type | Number of NaN Values | Percentage | |
|---|---|---|---|
| Air Pressure | float64 | 3 | 0.27 |
| Air Temperature | float64 | 5 | 0.46 |
| Avg Wind Direction | float64 | 4 | 0.37 |
| Avg Wind Speed | float64 | 3 | 0.27 |
| Max Wind Direction | float64 | 3 | 0.27 |
| Max Wind Speed | float64 | 4 | 0.37 |
| Rain Accumulation | float64 | 6 | 0.55 |
| Rain Duration | float64 | 3 | 0.27 |
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
Data[Temp] = imp.fit_transform(Data[Temp])
Data_info(Data)
| Data Type | Number of NaN Values | Percentage | |
|---|---|---|---|
| Air Pressure | float64 | 0 | 0.0 |
| Air Temperature | float64 | 0 | 0.0 |
| Avg Wind Direction | float64 | 0 | 0.0 |
| Avg Wind Speed | float64 | 0 | 0.0 |
| Max Wind Direction | float64 | 0 | 0.0 |
| Max Wind Speed | float64 | 0 | 0.0 |
| Rain Accumulation | float64 | 0 | 0.0 |
| Rain Duration | float64 | 0 | 0.0 |
| Relative Humidity (Morning) | float64 | 0 | 0.0 |
| Relative Humidity (Afternoon) | float64 | 0 | 0.0 |
Let's set Relative Humidity (Afternoon) as the target variable. This means given the dataset and using the rest of the features, we would like to know whether is humid or not at 3 PM. In doing so, define a Humidity Level (Afternoon) feature as follows:
N = 4
Target = 'Humidity Level (Afternoon)'
Data[Target], bins = pd.qcut(Data['Relative Humidity (Afternoon)'], precision =2, retbins= True, q=4, labels=np.arange(0, 4, 1))
df = Data.drop(columns = ['Relative Humidity (Afternoon)'])
Range_dict = dict(list(enumerate(['(%.2f, %.2f]' % (bins[i], bins[i+1]) for i in range(N)])))
del bins
Furthemore, let's look at the variance of our dataset features.
display(df.iloc[:,:-1].var().sort_values(ascending = False).to_frame(name= 'Variance')\
.style.background_gradient(cmap='OrRd').set_precision(2))
fig, ax = plt.subplots(figsize=(10,10))
Temp = df.drop(columns = [Target]).var().sort_values(ascending = False).to_frame(name= 'Variance').round(2).T
_ = sns.heatmap(Temp, ax=ax, annot=True, square=True, cmap =sns.color_palette("OrRd", 20),
linewidths = 0.8, vmin=0, vmax=Temp.max(axis =1)[0], annot_kws={"size": 12.5},
cbar_kws={'label': 'Feature Variance', "aspect":80, "shrink": .4, "orientation": "horizontal"})
lb = [x.replace(' ','\n').replace('\nof\n',' of\n') for x in [item.get_text() for item in ax.get_xticklabels()]]
_ = ax.set_xticklabels(lb)
_ = ax.set_yticklabels('')
del Temp
| Variance | |
|---|---|
| Rain Duration | 2546852.52 |
| Avg Wind Direction | 4762.57 |
| Max Wind Direction | 4508.55 |
| Relative Humidity (Morning) | 648.83 |
| Air Temperature | 124.32 |
| Max Wind Speed | 31.23 |
| Avg Wind Speed | 20.67 |
| Air Pressure | 10.11 |
| Rain Accumulation | 2.53 |
Furthermore, we would like to standardize features by removing the mean and scaling to unit variance. In this article, we demonstrated the benefits of scaling data using StandardScaler().
scaler = StandardScaler()
df.iloc[:,:-1] = scaler.fit_transform(df.iloc[:,:-1])
display(df.iloc[:,:-1].var().sort_values(ascending = False).to_frame(name= 'Variance')\
.style.background_gradient(cmap=sns.light_palette("green", as_cmap=True)).set_precision(2))
fig, ax = plt.subplots(figsize=(10,10))
Temp = df.drop(columns = [Target]).var().sort_values(ascending = False).to_frame(name= 'Variance').round(2).T
_ = sns.heatmap(Temp, ax=ax, annot=True, square=True, cmap =sns.color_palette("Greens", 20),
linewidths = 0.8, vmin=0, vmax=Temp.max(axis =1)[0], annot_kws={"size": 12.5},
cbar_kws={'label': 'Feature Variance', "aspect":80, "shrink": .4, "orientation": "horizontal"})
lb = [x.replace(' ','\n').replace('\nof\n',' of\n') for x in [item.get_text() for item in ax.get_xticklabels()]]
_ = ax.set_xticklabels(lb)
_ = ax.set_yticklabels('')
del Temp
| Variance | |
|---|---|
| Rain Duration | 1.00 |
| Air Temperature | 1.00 |
| Max Wind Direction | 1.00 |
| Avg Wind Direction | 1.00 |
| Max Wind Speed | 1.00 |
| Avg Wind Speed | 1.00 |
| Relative Humidity (Morning) | 1.00 |
| Air Pressure | 1.00 |
| Rain Accumulation | 1.00 |
df.describe().style.set_precision(2)
| Air Pressure | Air Temperature | Avg Wind Direction | Avg Wind Speed | Max Wind Direction | Max Wind Speed | Rain Accumulation | Rain Duration | Relative Humidity (Morning) | |
|---|---|---|---|---|---|---|---|---|---|
| count | 1095.00 | 1095.00 | 1095.00 | 1095.00 | 1095.00 | 1095.00 | 1095.00 | 1095.00 | 1095.00 |
| mean | 0.00 | 0.00 | -0.00 | -0.00 | 0.00 | -0.00 | 0.00 | -0.00 | 0.00 |
| std | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 |
| min | -3.43 | -2.53 | -1.84 | -1.06 | -1.79 | -1.04 | -0.13 | -0.18 | -1.11 |
| 25% | -0.73 | -0.68 | -1.10 | -0.72 | -1.08 | -0.71 | -0.13 | -0.18 | -0.75 |
| 50% | 0.01 | 0.07 | 0.34 | -0.36 | 0.42 | -0.37 | -0.13 | -0.18 | -0.43 |
| 75% | 0.72 | 0.76 | 0.71 | 0.40 | 0.78 | 0.33 | -0.13 | -0.18 | 0.44 |
| max | 3.28 | 3.05 | 2.92 | 3.97 | 2.43 | 4.09 | 14.99 | 10.91 | 2.29 |
def Correlation_Plot (Df,Fig_Size):
Correlation_Matrix = Df.corr().round(2)
mask = np.zeros_like(Correlation_Matrix)
mask[np.triu_indices_from(mask)] = True
for i in range(len(mask)):
mask[i,i]=0
Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True,
cmap =sns.color_palette("Greens", n_colors=10), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .6})
Correlation_Plot (df, 8)
We can visualize the data using Parallel Coordinates.
Temp = df.copy()
Temp[Target] = Temp[Target].map(Range_dict)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize = (15, 8))
_ = pd.plotting.parallel_coordinates(Temp, class_column = Target, ax = ax,
color=["#3498db", "#e74c3c", "#34495e", "#2ecc71"], axvlines = True)
_ = ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
_ = ax.legend(title = Target, loc="upper left", fontsize = 12)
_ = ax.set_ylim([-5,20])
# _ = ax.axvline(linewidth=1, color='gray')
del Temp
However, the results of this visualization can be improved if a clustering method is used. For this reason, we K-Means clustering method.
kmeans = KMeans(n_clusters = N)
Temp = df.drop(columns = Target)
model = kmeans.fit(Temp)
Out = pd.DataFrame(model.cluster_centers_, columns = df.iloc[:,1:].columns.tolist())
Out[Target] = np.sort(df[Target].unique().tolist())
Out.style.hide_index().set_precision(4)
| Air Temperature | Avg Wind Direction | Avg Wind Speed | Max Wind Direction | Max Wind Speed | Rain Accumulation | Rain Duration | Relative Humidity (Morning) | Humidity Level (Afternoon) |
|---|---|---|---|---|---|---|---|---|
| 0.7966 | -0.1969 | -1.2639 | 1.0467 | -1.1531 | 1.0799 | -0.1254 | -0.1736 | 0 |
| -0.6278 | -1.1868 | 0.6093 | -0.0014 | 0.6765 | -0.0430 | -0.0191 | 0.1277 | 1 |
| -0.1202 | 0.6194 | 0.3625 | -0.5245 | 0.2860 | -0.5270 | -0.1207 | -0.1663 | 2 |
| -0.8182 | -1.6451 | 0.7018 | 0.2225 | 0.5142 | 0.3217 | 6.7740 | 7.1550 | 3 |
Temp = Out.copy()
Temp[Target] = Temp[Target].map(Range_dict)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize = (15, 8))
_ = pd.plotting.parallel_coordinates(Temp, class_column = Target, ax = ax,
color=["#3498db", "#e74c3c", "#34495e", "#2ecc71"], axvlines = True)
_ = ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
_ = ax.legend(title = Target, loc="upper left", fontsize = 12)
_ = ax.set_ylim([-4,10])
X = Data.drop(columns = [Target])
y = pd.get_dummies(Data[Target]).astype(int)
Test_Size = 0.3
def Sets_Plot(Data, Test_Size):
Temp = pd.DataFrame({'Set': ['Train', 'Test'],
'Number of Instances':[int(Data.shape[0]*(1-Test_Size)), int(Data.shape[0]*Test_Size)]})
Temp['Percentage'] = np.round(100* Temp['Number of Instances'].values /Temp['Number of Instances'].sum(), 2)
fig = px.bar(Temp, y= ['',''], x= 'Number of Instances', orientation='h', color = 'Set', text = 'Percentage',
color_discrete_sequence = ['PaleGreen', 'LightBlue'], height = 180)
fig.update_layout(plot_bgcolor= 'white', legend_orientation='h', legend=dict(x=0, y=1.7),
xaxis = dict(tickmode = 'array', tickvals = [0, Data.shape[0]], ticktext = ['','']))
fig.update_traces(marker_line_color= 'Black', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}% ', textposition='inside')
fig.update_xaxes(title_text=None, range=[0, Data.shape[0]])
fig.update_yaxes(title_text=None)
fig.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= Test_Size, random_state=42)
display(pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T)
Sets_Plot(Data, Test_Size)
| Set | X_train | X_test | y_train | y_test |
|---|---|---|---|---|
| Shape | (766, 10) | (329, 10) | (766, 4) | (329, 4) |
Here, we implement an Multi-layer Perceptron (MLP) for Multi-Class classification using Keras. For more details see Deep Learning from Note.
model = keras.Sequential(name = 'Multi_Class_MLP')
model.add(layers.Dense(12, input_dim = X.shape[1], activation='relu', name='Layer1'))
model.add(layers.Dense(10, activation='sigmoid', name='Layer2'))
model.add(layers.Dense(4, activation='sigmoid', name='Layer3'))
model.add(layers.Dense(y.shape[1], activation='sigmoid', name='Layer4'))
model.summary()
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True, rankdir = 'LR')
Model: "Multi_Class_MLP" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= Layer1 (Dense) (None, 12) 132 _________________________________________________________________ Layer2 (Dense) (None, 10) 130 _________________________________________________________________ Layer3 (Dense) (None, 4) 44 _________________________________________________________________ Layer4 (Dense) (None, 4) 20 ================================================================= Total params: 326 Trainable params: 326 Non-trainable params: 0 _________________________________________________________________
# Number of iterations
IT = int(1e3)+1
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy','mae', 'mse'])
# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs= IT, batch_size=128, verbose = 0)
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = {'loss':'Loss', 'accuracy':'Accuracy', 'mae':'MAE', 'mse':'MSE'}
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(X_test, y_test, batch_size=128, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(X_train, y_train, batch_size=128, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | MAE | MSE | |
|---|---|---|---|---|
| Train Set Score | 0.9392 | 0.3179 | 0.2123 | 0.1641 |
| Validation Set Score | 0.9413 | 0.2819 | 0.2122 | 0.1627 |
def Plot_history(history, Title = False, Table_Rows = 25):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Loss'].values,
line=dict(color='OrangeRed', width= 1.5), name = 'Loss'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Accuracy'].values,
line=dict(color='MidnightBlue', width= 1.5), name = 'Accuracy'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MAE'].values,
line=dict(color='ForestGreen', width= 1.5), name = 'Mean Absolute Error (MAE)'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MSE'].values,
line=dict(color='purple', width= 1.5), name = 'Mean Squared Error (MSE)'), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, 1.6], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
ind = np.linspace(0, history.shape[0], Table_Rows, endpoint = False).round(0).astype(int)
ind = np.append(ind, history.Iteration.values[-1])
history = history[history.index.isin(ind)]
Temp = []
for i in history.columns:
Temp.append(history.loc[:,i].astype(float).round(4).values)
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color='darkslategray',
fill_color='DimGray', align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.4, 0.4, 0.4, 0.4],
cells=dict(values=Temp, line_color='darkslategray', fill=dict(color=['WhiteSmoke', 'white']),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
Plot_history(Train_Table, Title = 'Train Set')
Plot_history(Validation_Table, Title = 'Validation Set')
Labels = ['Very Low', 'Low', 'Medium', 'High']
# Train set
y_pred = model.predict(X_train)
Confusion_Matrix = confusion_matrix(y_train.values.argmax(axis=1), y_pred.argmax(axis=1))
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
fig.suptitle('Train Set', fontsize = 18)
_ = sns.heatmap(Confusion_Matrix, annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0],
linewidths = 0.2, cbar_kws={"shrink": 1})
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels');
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)
Confusion_Matrix = Confusion_Matrix.astype('float') / Confusion_Matrix.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(Confusion_Matrix, annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels');
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)
# Test set
y_pred = model.predict(X_test)
Confusion_Matrix = confusion_matrix(y_test.values.argmax(axis=1), y_pred.argmax(axis=1))
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
fig.suptitle('Test Set', fontsize = 18)
_ = sns.heatmap(Confusion_Matrix, annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0],
linewidths = 0.2, cbar_kws={"shrink": 1})
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels');
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)
Confusion_Matrix = Confusion_Matrix.astype('float') / Confusion_Matrix.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(Confusion_Matrix, annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels');
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)